package mrdemo008;

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TomcatLogMap  extends Mapper<LongWritable, Text,Text,IntWritable>{
	
	private IntWritable val = new IntWritable(1);
	
	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text,IntWritable>.Context context)
			throws IOException, InterruptedException {
		 
		// 取出的部分是： GET /upload/teach/book.html
		String line = value.toString();
		if(line!=null && line.length() > 20) {
			String key2 = handlerLog(line);
			context.write(new Text(key2), val);
		}
		 
	}
	
	/**
	 * 从原始数据行：	40.77.167.48 - - [06/Mar/2020:00:17:05 +0800] "GET /upload/teach/book.html HTTP/1.1" 200 4049
	 * 中解析，得到结果是：GET /upload/teach/book.html
	 * @param line
	 * @return
	 */
	private String handlerLog(String line) {
		String result = "";
		if(line.indexOf("GET") > 0) {
			result = line.substring(line.indexOf("GET"), line.indexOf("HTTP/")).trim();
		}else if (line.indexOf("POST") > 0) {
			result = line.substring(line.indexOf("POST"), line.indexOf("HTTP/")).trim();
		}
		return result;
	}
	
	public static void main(String[] args) {
		//本地先测试下处理字符串的方法的效果
		String s = "40.77.167.48 - - [06/Mar/2020:00:17:05 +0800] \"GET /upload/teach/book.html HTTP/1.1\" 200 4049";
		
		System.out.println(new TomcatLogMap().handlerLog(s));
	}
	
	 
}
